library(terra)
library(sf)
library(tidyverse)
library(tidyterra)
library(leaflet)
library(knitr)
knitr::opts_knit$set(root.dir = "/Users/Anthony/Data and Analysis Local/NYS_Wetlands_GHG/")
knitr::opts_chunk$set(message = FALSE, cache = TRUE)
here::i_am("NWI_training_data.qmd")
library(here)
set.seed(11)Test Training Data Generation
This is all data downloaded to a local folder
- reprojection is used to harmonize between the different sources. Right now that is just the HUCs layer which was the largest
ny_nwi <- vect("Data/NWI/NY_shapefile_wetlands/NY_Wetlands.shp") |> terra::project("EPSG:4269")
ny_state <- vect("Data/NWI/NY_shapefile_wetlands/New York.shp") |> terra::project("EPSG:4269")
ny_hucs_1 <- vect("Data/WBD_04_HU2_GPKG/WBD_04_HU2_GPKG.gpkg", layer = "WBDHU12") |> tidyterra::filter(str_detect(states, "NY"))
ny_hucs_2 <- vect("Data/WBD_02_HU2_GPKG/WBD_02_HU2_GPKG.gpkg", layer = "WBDHU12") |> tidyterra::filter(str_detect(states, "NY"))
ny_hucs_3 <- vect("Data/WBD_05_HU2_GPKG/WBD_05_HU2_GPKG.gpkg", layer = "WBDHU12") |> tidyterra::filter(str_detect(states, "NY"))
ny_hucs <- terra::union(ny_hucs_1, ny_hucs_2) |> terra::union(y = ny_hucs_3) |>
dplyr::select(!ends_with("_1")) |>
dplyr::select(!ends_with("_2"))
writeVector(ny_hucs, "Data/NY_HUCS/NY_HUCS_08.gpkg", overwrite = TRUE)This is for one HUC
test_huc <- ny_hucs |> dplyr::filter(str_detect(huc12, "020501020703"))
test_nwi_all <- ny_nwi |> terra::crop(y = test_huc)
test_nwi_subset <- test_nwi_all |>
dplyr::filter(WETLAND_TY == "Freshwater Forested/Shrub Wetland" | WETLAND_TY == "Freshwater Emergent Wetland")Create training data by spatSampleing the HUC and then extracting the wetland and upland points
- The wetlands are just the Freshwater Forested/Shrub and the Freshwater Emergent
- The uplands are outside of all of the wetlands including surface water
The number of points could depend on the size of the Watershed
test_nwi_pts <- terra::spatSample(test_huc, method = "random", size = 1E4) # 1E4 = 10000
test_nwi_pts_wet <- terra::mask(test_nwi_pts, test_nwi_subset, inverse = FALSE) |>
terra::intersect(x = test_nwi_subset |> terra::buffer(-1)) |> # This attributes the points with NWI classes
dplyr::mutate(MOD_CLASS = case_when(WETLAND_TY == "Freshwater Forested/Shrub Wetland" ~ "FSW",
.default = "EMW"),
COARSE_CLASS = "WET") |>
dplyr::select(MOD_CLASS, COARSE_CLASS)
test_nwi_pts_upl <- terra::mask(test_nwi_pts, test_nwi_all |> buffer(5), inverse = TRUE) |>
dplyr::mutate(MOD_CLASS = "UPL",
COARSE_CLASS = "UPL") |>
dplyr::select(MOD_CLASS, COARSE_CLASS)
test_nwi_pts_all <- rbind(test_nwi_pts_upl, test_nwi_pts_wet)
glimpse(test_nwi_all)# A SpatVector 679 x 5
# Geometry type: Polygons
# Geodetic CRS: lon/lat NAD83 (EPSG:4269)
# Extent (x / y) : ([75° 53' 47.49" W / 75° 46' 5.74" W] , [42° 23' 40.28" N / 42° 33' 7.63" N])
$ ATTRIBUTE <chr> "L1UBH", "L1UBHh", "L1UBHh", "PEM1/SS1E", "PEM1/SS1E", "PEM…
$ WETLAND_TY <chr> "Lake", "Lake", "Lake", "Freshwater Emergent Wetland", "Fre…
$ ACRES <dbl> 24.28460328, 112.72737950, 115.56355652, 1.46909221, 3.1166…
$ Shape_Leng <dbl> 1325.91388, 3933.78724, 4786.09754, 419.80582, 445.64377, 4…
$ Shape_Area <dbl> 98276.3029, 456191.5206, 467669.1218, 5945.2052, 12612.4960…
test_nwi_pts_all |> as.data.frame() |> dplyr::group_by(MOD_CLASS) |> dplyr::summarise(count = n())# A tibble: 3 × 2
MOD_CLASS count
<chr> <int>
1 EMW 49
2 FSW 438
3 UPL 9114
Sometimes there are not enough points for a specific class. In this case this is EMW or emergent wetlands
We can spatSample for more emergent from the NWI data layer.
- This adds points but the size appears to be relative. So 1000 points ~ 150 actual points
What’s better (I think…) is to convert the Emergent polygons to points then sub-sample 500 of them.
- Or the number of polygons which could/could not be a good idea
numEMW <- length(test_nwi_subset[test_nwi_subset$WETLAND_TY == "Freshwater Emergent Wetland"])
suppPoints <- test_nwi_subset |>
dplyr::filter(str_detect(WETLAND_TY, "Emergent")) |>
terra::buffer(-10) |> # a negative buffer should remove points on the lines
terra::as.points() |>
sample(size = c(numEMW, 100)[which.max(c(numEMW, 100))]) |>
dplyr::mutate(MOD_CLASS = "EMW",
COARSE_CLASS = "WET") |>
dplyr::select(MOD_CLASS, COARSE_CLASS)
test_nwi_pts_all_supp <- rbind(test_nwi_pts_all, suppPoints)
test_nwi_pts_all_supp |> as.data.frame() |> dplyr::group_by(MOD_CLASS) |> dplyr::summarise(count = n())# A tibble: 3 × 2
MOD_CLASS count
<chr> <int>
1 EMW 149
2 FSW 438
3 UPL 9114
This is a visualization to show the points and NWI polygons
s <- svc(test_nwi_subset, test_nwi_pts_all_supp)
names(s) <- c("Wetland Polygons", "Wetland Points")
plet(s, c("WETLAND_TY", "MOD_CLASS"))Warning: SpatVector layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
Need '+proj=longlat +datum=WGS84'
Warning: SpatVector layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
Need '+proj=longlat +datum=WGS84'